---
title: 'Section 4: Election Surveys'
author: "Milan Svolik"
date: "4/20/2023"
output:
  pdf_document: default
  html_document:
    df_print: paged
---

```{r setup, include=FALSE}
rm(list = ls(all = TRUE))
library(tidyverse)
library(stargazer)
library(lmtest)
library(reshape2)
library(foreign)
library(ggplot2)
library(broom)
library(estimatr)
library(texreg)


set.seed(5877)
```

## Load the merged survey data 
* Two proprietary surveys conducted by the survey agencies KONDA and SONAR
```{R}
load("data/df_surveys.RData")
```

# Table 2
* Election surveys: The joint distribution of March and June outcomes
* weight_s5: survey weights post-stratified to match 2019 administrative population totals for the joint distribution of age, gender, and education as well as the margins from the 2018 legislative and the March and June 2019 mayoral elections
```{R}
df_analyze <- df_surveys %>%
  mutate(weight = weight_s5) %>%
  filter(!is.na(weight))

dim(df_analyze)
```

### The joint distribution of March and June outcomes
```{R}
df_P <- df_analyze %>%
  group_by(vote_march_3, vote_june_3) %>%
  summarise(n = n(), weight=mean(weight)) %>%
  group_by(vote_march_3, vote_june_3) %>%
  mutate(n_weighted = n*weight) %>%
  ungroup() %>%
  mutate(
    p=n_weighted/sum(n_weighted), 
    percentage=round(100*p, 2))

df_P
```

### The three mechanisms
```{R}
switching <- df_P$p[2]-df_P$p[4]
backlash <- df_P$p[8]-df_P$p[6]
disenchantment <- df_P$p[3]-df_P$p[7]

round(100*switching, 2)
round(100*backlash, 2)
round(100*disenchantment, 2)
```

## Estimate the confidence intervals via the bootstrap
* use post-stratification weights as boostrap probabilities to propagate sampling uncertainty into the final estimates
```{R}
boot_rep <- 10000
mechs_out <- c()

# USE POST-STRATIFICATION WEIGHTS AS BOOTSTRAP PROBABILITIES
df_toboot <- df_analyze %>%
  mutate(rid = seq(1:n())) %>%
  select(rid, vote_march_3, vote_june_3, weight)
  
# BOOTSTRAP 
p_boot_save <- c()
for (i in 1:boot_rep) {
  
  # DRAW RIDS
  rid_boot <- sample(df_toboot$rid, replace=TRUE, prob=df_toboot$weight)
  
  # EXTRACT THE DATA BASED ON THE DRAW
  df_boot <- df_toboot[rid_boot,]
  
  # COMPUTE THE JOINT PMF
  p_boot <- table(df_boot$vote_march_3, df_boot$vote_june_3)/sum(table(df_boot$vote_march_3, df_boot$vote_june_3))
  
  # TRANSFORM TO A VECTOR THAT IS A ROWWISE TRANSFORMATION OF THE MATRIX
  p_boot <- c(t(p_boot))

  # SAVE IT
  p_boot_save <- rbind(p_boot_save, p_boot)
}
```


## Extract the bootstrap draws of the joint distribution of March and June outcomes and the mechanisms
```{R}
switching_boot <- c()
backlash_boot <- c()
disenchantment_boot <- c()
  
for (j in 1:boot_rep) {
  P <- matrix(p_boot_save[j,], nrow=3, byrow=TRUE)
  
  switching <- P[1,2] - P[2,1]
  switching_boot <- rbind(switching_boot, switching)
  
  backlash <- P[3,2] - P[2,3]
  backlash_boot <- rbind(backlash_boot, backlash)
  
  disenchantment <- P[1,3] - P[3,1]
  disenchantment_boot <- rbind(disenchantment_boot, disenchantment)
}    
```

## Bootstrap estimates of the mean and the confidence intervals of the joint distribution of March and June outcomes
* These are the estimates reported in Table 2
```{R}
p_boot_mean <- apply(p_boot_save, 2, mean, na.rm=TRUE)
p_boot_lower <- apply(p_boot_save, 2, quantile, probs=c(.025), na.rm=TRUE)
p_boot_upper <- apply(p_boot_save, 2, quantile, probs=c(.975), na.rm=TRUE)

P_boot_mean <- matrix(p_boot_mean, nrow=3, byrow=TRUE)
round(100*P_boot_mean, 2)

P_boot_lower <- matrix(p_boot_lower, nrow=3, byrow=TRUE)
round(100*P_boot_lower, 2)

P_boot_upper <- matrix(p_boot_upper, nrow=3, byrow=TRUE)
round(100*P_boot_upper, 2)
```

## Bootstrap estimates of the margins of the joint distribution of March and June outcomes
* These are the estimates reported in Table 2
```{R}
round(100*rowSums(P_boot_mean), 2)
round(100*colSums(P_boot_mean), 2)

round(100*rowSums(P_boot_lower), 2)
round(100*colSums(P_boot_lower), 2)

round(100*rowSums(P_boot_upper), 2)
round(100*colSums(P_boot_upper), 2)
```

## Bootstrap estimates of the mean and the confidence intervals of the three mechanisms
```{R}
switching_boot_mean <- apply(switching_boot, 2, mean, na.rm=TRUE)
switching_boot_lower <- apply(switching_boot, 2, quantile, probs=c(.025), na.rm=TRUE)
switching_boot_upper <- apply(switching_boot, 2, quantile, probs=c(.975), na.rm=TRUE)

backlash_boot_mean <- apply(backlash_boot, 2, mean, na.rm=TRUE)
backlash_boot_lower <- apply(backlash_boot, 2, quantile, probs=c(.025), na.rm=TRUE)
backlash_boot_upper <- apply(backlash_boot, 2, quantile, probs=c(.975), na.rm=TRUE)

disenchantment_boot_mean <- apply(disenchantment_boot, 2, mean, na.rm=TRUE)
disenchantment_boot_lower <- apply(disenchantment_boot, 2, quantile, probs=c(.025), na.rm=TRUE)
disenchantment_boot_upper <- apply(disenchantment_boot, 2, quantile, probs=c(.975), na.rm=TRUE)

# CONSOLIDATE
switching_boot_out <- c(switching_boot_mean, switching_boot_lower, switching_boot_upper)
backlash_boot_out <- c(backlash_boot_mean, backlash_boot_lower, backlash_boot_upper)
disenchantment_boot_out <- c(disenchantment_boot_mean, disenchantment_boot_lower, disenchantment_boot_upper)

round(100*switching_boot_out, 2)
round(100*backlash_boot_out, 2)
round(100*disenchantment_boot_out, 2)
```

# Figure 4
* Election surveys: The joint distribution of March and June outcomes

## Contruct the plot data
* weight_s5: survey weights post-stratified to match 2019 administrative population totals for the joint distribution of age, gender, and education as well as the margins from the 2018 legislative and the March and June 2019 mayoral elections
```{R}
df_analyze <- df_surveys %>%
  mutate(weight = weight_s5) %>%
  filter(!is.na(weight))
```

### A FUNCTION THAT CONDUCTS THE BOOTSTRAP
```{R}
mech_boot_fn <- function(data) {

  boot_rep <- 10000

  # BOOTSTRAP 
  p_boot_save <- c()
  for (i in 1:boot_rep) {
    
    # DRAW RIDS
    rid_boot <- sample(data$rid, replace=TRUE, prob=data$weight)
    
    # EXTRACT THE DATA BASED ON THE DRAW
    df_boot <- data[rid_boot,]
    
    # COMPUTE THE JOINT PMF
    p_boot <- table(df_boot$vote_march_3, df_boot$vote_june_3)/sum(table(df_boot$vote_march_3, df_boot$vote_june_3))
    
    # TRANSFORM TO A VECTOR THAT IS A ROWWISE TRANSFORMATION OF THE MATRIX
    p_boot <- c(t(p_boot))
    
    # SAVE IT
    p_boot_save <- rbind(p_boot_save, p_boot)
  }      
  
  # EXTRACT THE BOOTSTRAP DRAWS OF THE JOINT DISTRIBUTION OF MARCH AND JUNE OUTCOMES AND THE MECHANISMS
  switching_boot <- c()
  backlash_boot <- c()
  disenchantment_boot <- c()
  
  for (j in 1:boot_rep) {
    P <- matrix(p_boot_save[j,], nrow=3, byrow=TRUE)
    
    switching <- P[1,2] - P[2,1]
    switching_boot <- rbind(switching_boot, switching)
    
    backlash <- P[3,2] - P[2,3]
    backlash_boot <- rbind(backlash_boot, backlash)
    
    disenchantment <- P[1,3] - P[3,1]
    disenchantment_boot <- rbind(disenchantment_boot, disenchantment)
  }    
  
  ## BOOTSTRAP ESTIMATES OF THE MEAN AND THE CONFIDENCE INTERVALS OF THE THREE MECHANISMS
  switching_boot_mean <- apply(switching_boot, 2, mean, na.rm=TRUE)
  switching_boot_lower <- apply(switching_boot, 2, quantile, probs=c(.025), na.rm=TRUE)
  switching_boot_upper <- apply(switching_boot, 2, quantile, probs=c(.975), na.rm=TRUE)
  
  backlash_boot_mean <- apply(backlash_boot, 2, mean, na.rm=TRUE)
  backlash_boot_lower <- apply(backlash_boot, 2, quantile, probs=c(.025), na.rm=TRUE)
  backlash_boot_upper <- apply(backlash_boot, 2, quantile, probs=c(.975), na.rm=TRUE)
  
  disenchantment_boot_mean <- apply(disenchantment_boot, 2, mean, na.rm=TRUE)
  disenchantment_boot_lower <- apply(disenchantment_boot, 2, quantile, probs=c(.025), na.rm=TRUE)
  disenchantment_boot_upper <- apply(disenchantment_boot, 2, quantile, probs=c(.975), na.rm=TRUE)
  
  # CONSOLIDATE
  switching_boot_out <- c(switching_boot_mean, switching_boot_lower, switching_boot_upper)
  backlash_boot_out <- c(backlash_boot_mean, backlash_boot_lower, backlash_boot_upper)
  disenchantment_boot_out <- c(disenchantment_boot_mean, disenchantment_boot_lower, disenchantment_boot_upper)
  
  return(list(switching_boot_out, backlash_boot_out, disenchantment_boot_out))
}
```

### FILTER TO 2018 CHP VOTERS
```{R}
# USE POST-STRATIFICATION WEIGHTS AS BOOTSTRAP PROBABILITIES
df_toboot <- df_analyze %>%
  filter(vote_2018=="CHP") %>%
  mutate(rid = seq(1:n())) %>%
  select(rid, vote_march_3, vote_june_3, weight)

# BOOTSTRAP 
boot_out_list <- mech_boot_fn(df_toboot)
    
# SAVE FOR PLOTING
df_plot_CHP <- data.frame(rbind(
  c(-1, 1, boot_out_list[[1]]),
  c(-1, 2, boot_out_list[[2]]), 
  c(-1, 3, boot_out_list[[3]])))
```

### FILTER TO 2018 AKP VOTERS
```{R}
# USE POST-STRATIFICATION WEIGHTS AS BOOTSTRAP PROBABILITIES
df_toboot <- df_analyze %>%
  filter(vote_2018=="AKP") %>%
  mutate(rid = seq(1:n())) %>%
  select(rid, vote_march_3, vote_june_3, weight)

# BOOTSTRAP 
boot_out_list <- mech_boot_fn(df_toboot)
    
# SAVE FOR PLOTING
df_plot_AKP <- data.frame(rbind(
  c(1, 1, boot_out_list[[1]]),
  c(1, 2, boot_out_list[[2]]), 
  c(1, 3, boot_out_list[[3]])))
```

## FILTER TO 2018 NEITHER AKP OR CHP
```{R}
# USE POST-STRATIFICATION WEIGHTS AS BOOTSTRAP PROBABILITIES
df_toboot <- df_analyze %>%
  filter(vote_2018!="AKP" & vote_2018!="CHP") %>%
  mutate(rid = seq(1:n())) %>%
  select(rid, vote_march_3, vote_june_3, weight)

# BOOTSTRAP 
boot_out_list <- mech_boot_fn(df_toboot)
    
# SAVE FOR PLOTING
df_plot_OTHER <- data.frame(rbind(
  c(0, 1, boot_out_list[[1]]),
  c(0, 2, boot_out_list[[2]]), 
  c(0, 3, boot_out_list[[3]])))
```

## PLOT
```{r}
df_plot <- rbind(df_plot_CHP, df_plot_OTHER, df_plot_AKP)
colnames(df_plot) <- c("vote2018", "mech", "estimate", "ci_lower", "ci_upper")

df_plot <- df_plot %>%
    mutate(mech=as.factor(mech)) %>%
    mutate(vote2018=as.factor(vote2018))
  
plot_group_labels <- c("vote switching", "backlash", "disengagement")
plot_x_label <- c("Vote in the 2018 legislative election")
plot_x_labels <- c("CHP", "Other", "AKP")
plot_x_dodge <- .1
df_plot %>%
  ggplot(aes(x=vote2018, y=100*estimate), col=mech) +
  geom_hline(yintercept=0, col="grey50") +
  geom_line(aes(col=mech, group=mech, linetype = mech),  size=1,  position = position_dodge(plot_x_dodge)) + 
  geom_errorbar(aes(ymin=100*ci_lower, ymax=100*ci_upper, col=mech), width=0.1,  position = position_dodge(plot_x_dodge)) +
  geom_point(aes(col=mech, shape=mech),  size=2, position = position_dodge(plot_x_dodge)) +
  scale_shape_manual(name  ="", values=c(19,15,17), labels=plot_group_labels) +
  scale_linetype_manual(name  ="", values=c("solid", "dashed", "dotted"), labels=plot_group_labels)+
  scale_color_manual(name  ="", values=c("black", "red", "blue"), labels=plot_group_labels) +
  theme_minimal() +
  scale_x_discrete(name = plot_x_label,
        labels=plot_x_labels) +
  scale_y_continuous(name = "% of subjects (within each subgroup)")

# SAVE THE PLOT
ggsave("figures/figure_4.png", dpi = 1200, width = 10, height = 6, units = "in")
ggsave("figures/figure_4.eps", dpi = 1200, width = 10, height = 6, units = "in")
ggsave("figures/figure_4.pdf", dpi = 1200, width = 10, height = 6, units = "in")
```

# Table 3
* ELECTION SURVEYS: HETEROGENEITY IN PUNISHMENT BY RESPONDENT COVARIATES
* Just like earlier we are working with vote_3, which implies that:
    +   third-party voters are treated as abstainers
    +   I am dropping the undecided
* weight_s5: survey weights post-stratified to match 2019 administrative population totals for the joint distribution of age, gender, and education as well as the margins from the 2018 legislative and the March and June 2019 mayoral elections
```{R}
rm(list = ls(all = TRUE))
load("data/df_surveys.RData")
```

### PREPARE THE DATA FOR ESTIMATION
```{r}
# SELECT THE VARIABLES TO BE USED
df_temp <- df_surveys %>%
  mutate(weight = weight_s5) %>%
  filter(!is.na(weight)) %>%
  select(vote_march_3, vote_june_3, vote_2018, dg_sex, dg_age, dg_edu, turkish, istanbul_born, weight, ilce_name)

colnames(df_temp)
dim(df_temp)

# RECODE AGE TO HAVE THE SAME CATEGORIES AS IN THE SURVEY EXPERIMENTS
table(df_temp$dg_age)
df_temp$dg_age <-  fct_recode(df_temp$dg_age, 
                         `18-34` = "18-24",
                         `18-34` = "25-34",
                         `35-54` = "35-44",
                         `35-54` = "45-54",
                         `55+` = "55-64",
                         `55+` = "65-up")
table(df_temp$dg_age)
```

## VOTE SWITCHING
* THE SAMPLE: all who voted for the AKP in March
  + MARCH: 0 if the subject voted for the AKP in March, NA otherwise
  + JUNE: 1 if the subject voted for the AKP in March but voted CHP in June; 0 otherwise
* THE DV y: the MARCH to JUNE shift
  + 0 if none
  + 1 if yes
```{r}
# DV
df_vote_switching <- df_temp
table(df_vote_switching$vote_march_3, df_vote_switching$vote_june_3)

vote_march <- case_when(df_vote_switching$vote_march_3=="AKP" ~ 0)
vote_june <- case_when(df_vote_switching$vote_march_3=="AKP" & df_vote_switching$vote_june_3=="AKP" ~ 0, 
                            df_vote_switching$vote_march_3=="AKP" & df_vote_switching$vote_june_3=="CHP" ~ 1)

df_vote_switching <- df_vote_switching %>%
  mutate(vote_march, vote_june) %>%
  mutate(Y = vote_june - vote_march)

df_vote_switching %>%
  group_by(vote_march, vote_june) %>%
  summarise(N=n(), mean(Y))

df_ols <- df_vote_switching %>%
  select(Y, dg_age, dg_sex, dg_edu, turkish, istanbul_born, weight, ilce_name)

ols_vote_switching <- lm_robust(Y ~ .-weight-ilce_name, data=df_ols, weights = weight)
plotreg(list(ols_vote_switching))
```

## BACKLASH
* THE SAMPLE: ABS (including vote blank or vote third party) in March
  + MARCH: 0 if the subject ABS (abstained) in March, NA otherwise
  + JUNE: 1 if the subject ABS in March but voted CHP in June; 0 otherwise
* THE DV y: the MARCH to JUNE shift
  + 0 if none
  + 1 if yes
```{r}
# DV
df_backlash <- df_temp
table(df_backlash$vote_march_3, df_backlash$vote_june_3)

vote_march <- case_when(df_backlash$vote_march_3=="ABS" ~ 0)
vote_june <- case_when(df_backlash$vote_march_3=="ABS" & df_backlash$vote_june_3=="ABS" ~ 0, 
                            df_backlash$vote_march_3=="ABS" & df_backlash$vote_june_3=="CHP" ~ 1)

df_backlash <- df_backlash %>%
  mutate(vote_march, vote_june) %>%
  mutate(Y = vote_june - vote_march)

df_backlash %>%
  group_by(vote_march, vote_june) %>%
  summarise(N=n(), mean(Y))

df_ols <- df_backlash %>%
  select(Y, dg_age, dg_sex, dg_edu, turkish, istanbul_born, weight, ilce_name)

ols_backlash <- lm_robust(Y ~ .-weight-ilce_name, data=df_ols, weights = weight)
plotreg(list(ols_backlash))
```
## DISENCHANTMENT
* THE SAMPLE: AKP in March
  + MARCH: 0 if the subject voted for the AKP, NA otherwise
  + JUNE: 1 if the subject voted for the AKP in March but ABS (abstained) in June; 0 otherwise
* THE DV y: the MARCH to JUNE shift
  + 0 if none
  + 1 if yes
```{r}
# DV
df_disenchantment <- df_temp
table(df_disenchantment$vote_march_3, df_disenchantment$vote_june_3)

vote_march <- case_when(df_disenchantment$vote_march_3=="AKP" ~ 0)
vote_june <- case_when(df_disenchantment$vote_march_3=="AKP" & df_disenchantment$vote_june_3=="AKP" ~ 0, 
                            df_disenchantment$vote_march_3=="AKP" & df_disenchantment$vote_june_3=="ABS" ~ 1)

df_disenchantment <- df_disenchantment %>%
  mutate(vote_march, vote_june) %>%
  mutate(Y = vote_june - vote_march)

df_disenchantment %>%
  group_by(vote_march, vote_june) %>%
  summarise(N=n(), mean(Y))

df_ols <- df_disenchantment %>%
  select(Y, dg_age, dg_sex, dg_edu, turkish, istanbul_born, weight, ilce_name)

ols_disenchantment <- lm_robust(Y ~ .-weight-ilce_name, data=df_ols, weights = weight)
plotreg(list(ols_disenchantment))
```

## Report Table 3
```{R}
texreg(list(ols_vote_switching, ols_backlash, ols_disenchantment), include.ci = FALSE, digits = 3, custom.model.names = c("Vote Switching", "Backlash", "Disenchantment"), custom.coef.names = c("Intercept", "Age: 35-54", "Age: 55+", "Sex: male", "Education: High school", "Education: College or higher", "Turkish", "Istanbul born"), stars=c(0.01, 0.05, 0.1))
```

```